Many of the figures and analysis methodology were based off of this tutorial from NYU. https://learn.gencore.bio.nyu.edu/rna-seq-analysis/gene-set-enrichment-analysis/
## Library Preparation
library(clusterProfiler)
library(ReactomePA)
library(DESeq2)
library(dplyr)
library(tidyverse)
library(presto)
library(ashr)
library(GOSemSim)
library(org.Ce.eg.db)
library(bookdown)
library(knitr)
library(DT)
library(htmltools)
library(kableExtra)
# Call functions
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/translateBioIDs.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/callClusterProfilerFunc.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/compileOntologyReport.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/emptyTableMessage.R")
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Calls translateBioIDs.R and geneOntologyAnalysis.R # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
compileOntologyReport <- function(result.obj, strain, fraction, l2fc_filter, padj_filter){
# Create comment structure for bookdown render
cat(sprintf("# %s %s {.title .tabset}\n\n", strain, fraction))
#cat(paste0("# ", strain," ", fraction, " {.title .tabset}\n\n"))
# Show exact call which invoked function
call_txt <- paste(deparse(match.call()), collapse = "\n")
cat("**Inputs for function call:**\n\n")
cat("```r\n")
cat(call_txt, "\n\n")
cat("```\n\n")
# Translate Wormbase IDs to ENTREZID
result.obj.list <- translateBioIDs(
DESeqResults.obj = result.obj,
bioID = "WORMBASE",
l2fc_filter = l2fc_filter,
padj_filter = padj_filter
)
# Run gruopGO(), enrichGO(), gseGO(), and goplot()
go.result.list <- callClusterProfilerFunc(
result.obj.list = result.obj.list,
OrgDb = org.Ce.eg.db)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Print text and tables to console for bookdown rendering # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
knitDataTable <- function(df, tableName, fileExtension){
if(nrow(data.frame(df[[1]])) != 0){
table <- list(datatable(data.frame(df[[1]]),
extensions = 'Buttons',
rownames = FALSE,
caption = tableName,
options = list(
dom = "Bfrtip",
buttons = list('copy',
list(extend = 'csv', filename = sprintf("%s_%s_%s", gsub(" ", "_", tolower(strain)), tolower(fraction), fileExtension)),
list(extend = 'excel', filename = sprintf("%s_%s_%s", gsub(" ", "_", tolower(strain)), tolower(fraction), fileExtension))
),
pageLength = 5,
scrollX = TRUE)))
print(htmltools::tagList(table[[1]]))
cat("\n\n")
write.csv(df[[1]], file = sprintf("./results/%s_%s_%s.csv", gsub(" ", "_", tolower(strain)), tolower(fraction), fileExtension))
}else{emptyTableMessage(tableName)}
# clean up environment
rm(table)
}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # Dysregulated Reactome Pathway GSE, GO GSE # # #
cat(paste0("\n\n## Dysregulated\n\n"))
# Reactome Pathway GSE
cat("**Reactome Pathway GSEA**")
knitDataTable(
df = go.result.list$react_gse,
tableName = " Reactome Gene Set Enrichment Analysis",
fileExtension = "Reactome_gsea"
)
# # # GSEA plot condiitonal rendering of RP GSEA plots
# Tabset for Reactome GSEA plots
cat("#### {.tabset .tabset-dropdown}\n\n")
# Print GSEA plot for each react pathway set identified
for (i in 1:nrow(data.frame(go.result.list$react_gse[[1]]))) {
if (i > 30) {break} # Dont print over 30 plots
cat(sprintf("##### %s \n\n", go.result.list$react_gse[[1]]$Description[i]))
print(gseaplot(
go.result.list$react_gse[[1]],
by = "all",
title = go.result.list$react_gse[[1]]$Description[i],
geneSetID = go.result.list$react_gse[[1]]$ID[i]))
cat("\n\n")
}
# GO GSE
cat("### \n\n") # Break out of previous tabset
cat("**GO GSEA**")
knitDataTable(
df = go.result.list$go_gse,
tableName = " GSE Of All Differentially Expressed Genes",
fileExtension = "GO_gsea"
)
# # # GSEA plot condiitonal rendering
# Tabset for GSEA plots
cat("#### {.tabset .tabset-dropdown}\n\n")
# Print GSEA plot for each set identified
for (i in 1:nrow(data.frame(go.result.list$go_gse[[1]]))) {
if (i > 30) {break} # Dont print over 30 plots
cat(sprintf("##### %s \n\n", go.result.list$go_gse[[1]]$Description[i]))
print(gseaplot(
go.result.list$go_gse[[1]],
by = "all",
title = go.result.list$go_gse[[1]]$Description[i],
geneSetID = go.result.list$go_gse[[1]]$ID[i]))
cat("\n\n")
}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # Upregulated GO classification, GO over-representation, RP over-representation # # #
cat(paste0("\n\n## Upregulated\n\n"))
# GO classification
knitDataTable(
df = go.result.list$go_class_upreg,
tableName = " GO Classification of Upregulated Genes",
fileExtension = "GO_classification_upreg"
)
# GO over-representation
knitDataTable(
df = go.result.list$go_overrep_upreg,
tableName = " GO Over-representation Analysis of Upregulated Genes",
fileExtension = "GO_overrepresented_upreg"
)
# RP over-representation
knitDataTable(
df = go.result.list$react_overrep_upreg,
tableName = " Reactome Pathway Over-representation Analysis of Upregulated Genes",
fileExtension = "Reactome_overrepresented_upreg"
)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # Downregulated GO classification, GO over-representation, RP over-representation # # #
cat(paste0("\n\n## Downregulated\n\n"))
# GO classification
knitDataTable(
df = go.result.list$go_class_downreg,
tableName = " GO Classification of Downregulated Genes",
fileExtension = "GO_classification_downreg"
)
# GO over-representation
knitDataTable(
df = go.result.list$go_overrep_downreg,
tableName = " GO Over-representation Analysis of Downregulated Genes",
fileExtension = "GO_overrepresented_downreg"
)
# RP over-representation
knitDataTable(
df = go.result.list$react_overrep_downreg,
tableName = " Reactome Pathway Over-representation Analysis of Downregulated Genes",
fileExtension = "Reactome_overrepresented_downreg"
)
}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Translating Biological IDS # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # From WormBase to ENTREZID
translateBioIDs <- function (DESeqResults.obj, bioID, return.success = TRUE, l2fc_filter, padj_filter) {
if (bioID != "WORMBASE"){
stop("⛔️ ERROR: Only WORMBASE biological ID supported as input.")
}
# Convert DESeq2 results object to DataFrame, and edit structure
gene.df <- DESeqResults.obj %>%
data.frame() %>%
rownames_to_column("WORMBASE") %>%
dplyr::select(WORMBASE, log2FoldChange, pvalue, padj)
# Biological ID TranslatoR
gene_ids <- bitr(gene.df[,'WORMBASE'],
fromType = "WORMBASE",
toType = "ENTREZID",
OrgDb = org.Ce.eg.db)
# Record success of ID transfer
t1 <- table(gene.df$WORMBASE %in% gene_ids$WORMBASE)
# Clear duplicate genes following ID transfer
gene_ids <- gene_ids %>%
distinct(WORMBASE, .keep_all = T) %>%
column_to_rownames("WORMBASE")
# Add ENTREZID to L2FC carrying DataFrame
gene.df <- gene.df %>%
mutate(ENTREZID = gene_ids[WORMBASE, 1]) %>%
dplyr::select(ENTREZID, log2FoldChange, pvalue, padj) %>%
subset(!is.na(ENTREZID)) %>% # remove NA ENTREZ ID values
arrange(desc(log2FoldChange)) # <--- arrange genes by L2FC descending
rownames(gene.df) <- NULL
# # Subset L2FC Data # #
# Subset for highly varirable genes
variable.genes <- gene.df %>%
subset(abs(log2FoldChange) > l2fc_filter & padj < padj_filter & !isNA(padj))
rownames(variable.genes) <- NULL
# Subset for highly upregulated genes
upregulated.genes <- gene.df %>%
subset(log2FoldChange > l2fc_filter & padj < padj_filter & !isNA(padj))
rownames(upregulated.genes) <- NULL
# Subset for highly downregulated genes
downregulated.genes <- gene.df %>%
subset(log2FoldChange < -l2fc_filter & padj < padj_filter & !isNA(padj))
rownames(downregulated.genes) <- NULL
# Subset for all genes (universe DataFrame)
all.genes <- gene.df %>%
subset(!isNA(padj)) %>%
arrange(desc(log2FoldChange))
# Vectorized differentially expressed genes
vectorized <- all.genes[,2]
names(vectorized) <- all.genes[,1]
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Print Results Following Subset # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Record transfer success
if (return.success == TRUE){
cat("\n")
cat("**Transfering WORMBASE gene IDs to ENTREZID:**\n\n")
cat("```r\n")
cat(paste0(t1[["FALSE"]]," gene IDs were not transfered from WORMBASE to ENTREZID \n"))
cat(paste0(t1[["TRUE"]]," gene IDs were successfully transfered from WORMBASE to ENTREZID \n\n"))
cat("```\n\n")
}
# report number of highly variable genes used for analysis
cat(sprintf("**Number of highly variable genes identified (with | L2FC | > %s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
cat("```r\n")
cat(length(variable.genes[[1]]))
cat("\n\n```\n\n")
# report number of highly variable genes used for analysis
cat(sprintf("**Number of highly upregulated genes identified (with L2FC > %s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
cat("```r\n")
cat(length(upregulated.genes[[1]]))
cat("\n\n```\n\n")
# report number of highly variable genes used for analysis
cat(sprintf("**Number of highly downregulated genes identified (with L2FC < -%s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
cat("```r\n")
cat(length(downregulated.genes[[1]]))
cat("\n\n```\n\n")
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Return Data # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Save all data to list
gene.list <- list(
variable.genes,
upregulated.genes,
downregulated.genes,
all.genes,
vectorized
)
names(gene.list) <- c(
"variable_genes",
"upregulated_genes",
"downregulated_genes",
"all_genes",
"vectorized_all_DE"
)
return(gene.list)
}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Gene Ontology Enrichment Analysis # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
callClusterProfilerFunc <- function(result.obj.list, OrgDb){
# # Extract filtered lists from list container
geneNames.variable <- result.obj.list$variable_genes[,1]
geneNames.up <- result.obj.list$upregulated_genes[,1]
geneNames.down <- result.obj.list$downregulated_genes[,1]
geneList <- result.obj.list$vectorized_all_DE
universe <- result.obj.list$all_genes[,1]
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Analysis of Upregulated Genes # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
## RUN WITH BP ONTOLOGY
# # GO Classification # #
go_class_upreg <- groupGO(
gene = geneNames.up,
OrgDb = OrgDb,
ont = "BP",
level = 3,
readable = TRUE) %>%
arrange(desc(Count))
# # GO Over-Representation Analysis # #
go_overrep_upreg <- enrichGO(
gene = geneNames.up,
universe = universe,
OrgDb = OrgDb,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.1,
qvalueCutoff = 0.1,
readable = TRUE) %>%
arrange(desc(Count))
# # Reactome Pathway Over-Representation Analysis # #
reactome_overrep_upreg <- enrichPathway(
gene = geneNames.up,
universe = universe,
organism = 'celegans',
pvalueCutoff = 0.05,
qvalueCutoff = 0.1,
minGSSize = 20,
maxGSSize = 500,
readable = TRUE) %>%
arrange(desc(Count))
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Analysis of Downregulated Genes # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # GO Classification # #
go_class_downreg <- groupGO(
gene = geneNames.down,
OrgDb = OrgDb,
ont = "BP",
level = 3,
readable = TRUE) %>%
arrange(desc(Count))
# # GO Over-Representation Analysis # #
go_overrep_downreg <- enrichGO(
gene = geneNames.down,
universe = universe,
OrgDb = OrgDb,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.1,
qvalueCutoff = 0.1,
readable = TRUE) %>%
arrange(desc(Count))
# # Reactome Pathway Over-Representation Analysis # #
reactome_overrep_downreg <- enrichPathway(
gene = geneNames.down,
universe = universe,
organism = 'celegans',
pvalueCutoff = 0.05,
qvalueCutoff = 0.1,
minGSSize = 20,
maxGSSize = 500,
readable = TRUE) %>%
arrange(desc(Count))
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # GO Gene Set Enrichment Analysis # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
go_gene_set_enrichment <- gseGO(
geneList = geneList,
OrgDb = OrgDb,
ont = "BP",
minGSSize = 20,
maxGSSize = 500,
pvalueCutoff = 0.1,
verbose = FALSE
)
# change gene IDs from ENTREZID to symbol
go_gene_set_enrichment <- setReadable(go_gene_set_enrichment, OrgDb = OrgDb, keyType="ENTREZID")
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Reactome Enrichment Analysis # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
reactome_gene_set_enrichment <- gsePathway(
geneList = geneList,
organism = 'celegans',
pvalueCutoff = 0.2,
pAdjustMethod = "BH",
minGSSize = 20,
maxGSSize = 500,
verbose = FALSE
)
# change Reactome GSE gene IDs from ENTREZID to symbol
reactome_gene_set_enrichment <- setReadable(reactome_gene_set_enrichment, OrgDb = OrgDb, keyType="ENTREZID")
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Return Results # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
go_results <- list(
list(go_class_upreg),
list(go_overrep_upreg),
list(reactome_overrep_upreg),
list(go_class_downreg),
list(go_overrep_downreg),
list(reactome_overrep_downreg),
list(go_gene_set_enrichment),
list(reactome_gene_set_enrichment)
)
names(go_results) <- c(
"go_class_upreg",
"go_overrep_upreg",
"react_overrep_upreg",
"go_class_downreg",
"go_overrep_downreg",
"react_overrep_downreg",
"go_gse",
"react_gse"
)
return(go_results)
}
# Lysate
lys <- readRDS("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/intermediate_data/deseq2_dds_objects/lotr1_isolatedStrain_Lysate_BatchCor_dds.rds")
# Polysome
ply <- readRDS("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/intermediate_data/deseq2_dds_objects/lotr1_isolatedStrain_Polysome_BatchCor_dds.rds")
# Full KO vs WT
full_vs_wt.lys <- results(lys, name = "strain_DUP207_vs_DUP167")
full_vs_wt.ply <- results(ply, name = "strain_DUP207_vs_DUP167")
# Tudor KO vs WT
tudor_vs_wt.lys <- results(lys, name = "strain_DUP185_vs_DUP167")
tudor_vs_wt.ply <- results(ply, name = "strain_DUP185_vs_DUP167")
# LOTUS KO vs WT
lotus_vs_wt.lys <- results(lys, name = "strain_DUP217_vs_DUP167")
lotus_vs_wt.ply <- results(ply, name = "strain_DUP217_vs_DUP167")
Inputs for function call:
compileOntologyReport(result.obj = full_vs_wt.lys, strain = "Full LOTR-1 KO vs WT",
fraction = "Lysate", l2fc_filter = 0.8, padj_filter = 0.1)
Transfering WORMBASE gene IDs to ENTREZID:
542 gene IDs were not transfered from WORMBASE to ENTREZID
15111 gene IDs were successfully transfered from WORMBASE to ENTREZID
Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.1) is:
300
Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.1) is:
254
Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.1) is:
46
Table GO Over-representation Analysis of Downregulated Genes has no rows.